version 16.1
clear all
cd "MYPATH\derived\nt_data_coverage"
adopath + ../../ado/

cap log close
log using analysis.log, replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "${`PATH'}\derived\nt_data_coverage"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\nt_data_coverage\figures"
}

graph set window fontface default
graph set ps fontface default
graph set window fontfacemono "Consolas"
graph set ps fontfacemono "Consolas"

program main
	prep_data
	share_do_nt
	*pr_share_do_nt
	*share_analysis_covered
	cap log close
end

program prep_data
	use $TEMP\derived\make_master_dataset\mfr_clean, clear
	sum preg_id_mfr
	local max_preg_id_mfr = r(max)
	
	use $DATA\master_dataset_clean, clear
	gen m_year = ym(year, month)
	format m_year %tm
	drop if year < 2011 
	drop if m_year > ym(2019, 11)	
	
	replace preg_id_mfr = . if in_MFR == 0
	tab in_MFR
	
	egen gr_preg_id_mfr = group(pregnancy) if mi(preg_id_mfr)
	replace preg_id_mfr = gr_preg_id_mfr + `max_preg_id_mfr' if mi(preg_id_mfr)
	drop gr_preg_id_mfr

	tempfile nt_data
	save `nt_data', replace
	
	use $TEMP\derived\make_master_dataset\mfr_clean, clear
	gen year = bfoddat_yr
	gen bfo_15 = mdy(bfoddat_month, 15, bfoddat_yr) 
	merge_admin
	rename lan lan_mfr
	drop year
	
	* Mother age from bakgrund
	qui {
		gen temp_mother_birth = mdy(fodelsemanad, 15, fodelsear)
		format temp_mother_birth %td
		personage temp_mother_birth bfo_15, gen(age_mfr)
	}
	gen m_year_mfr = ym(bfoddat_yr, bfoddat_month) 
	format m_year_mfr %tm
	merge 1:1 preg_id_mfr using `nt_data', assert(1 2 3) keep(1 3) 
	drop _merge
	
	* Drop MBR-only pregnancies outside the range of NT data
	drop if bfoddat_yr < 2011 & mi(test_id)
	drop if m_year_mfr > ym(2019, 11) & mi(test_id)
	replace m_year = m_year_mfr if mi(test_id)
	codebook m_year
	
	* Clean Lan
	codebook lan_mfr
	replace lan = lan_mfr if mi(lan)
	
	* Create "week10_date" 
	drop bfo_15
	gen bfo_15 = mdy(bfoddat_month, 15, bfoddat_yr) if mi(test_id)
	format bfo_15 undersokningsdatum %td
	gen week10_date = bfo_15 - 210 if mi(test_id) // for those in MBR but not NT data
	replace week10_date = date_preg - 210 if !mi(test_id)

	* Create wave data
	build_waves
	
	assert !mi(week10_date)
	gen wave = 1 if week10_date < dofm(wave2_intro)
	replace wave = 2 if week10_date >= dofm(wave2_intro)
	replace wave = 3 if week10_date >= dofm(wave3_intro)
	
	* Clean mother's age
	replace age = age_mfr if mi(test_id)

	* clean lan 
	* label define lan 1 "Stockholm" 3 "Uppsala" 4 "Södermanland" 5 "Östergötland" 6 "Jönköping" 7 "Kronoberg" 8 "Kalmar" 9 "Gotland" 10 "Blekinge" 12 "Skåne" 13 "Halland" 14 "Västra Götaland" 17 "Värmland" 18 "Örebro" 19 "Västmanland" 20 "Dalarna" 21 "Gävleborg" 22 "Västernorrland" 23 "Jämtland" 24 "Västerbotten" 25 "Norrbotten"
    * label values `from' lan
		
	* NT COVERAGE TYPE
	gen kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) 
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19)
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))
	
	replace kub_type = 3 if lan == 10 & wave == 2
	label define kub_t 1 "Age 35 KUB threshold" 2 "Universal KUB coverage" 3 "Age 38 KUB threshold"
	label values kub_type kub_t
	
	drop did_kub
	gen did_nt = 1 * (!mi(test_id))
	gen temp_date = dofm(m_year)
	format temp_date %d
	replace year = year(temp_date) if mi(year)
	
	keep lopnr preg_id_mfr test_id wave kub_type age lan did_nt m_year
end


program merge_admin
    di "Merge admin"
   merge m:1 lopnr year using "$MYPATH\MYPATH.dta", assert(1 2 3) keep(1 3) ///
	 generate(mother_in_tax_data) keepusing(lan)
   replace mother_in_tax_data = 1*(mother_in_tax_data == 3)
     
   merge m:1 lopnr using "MYPATH\MYPATH.dta",  ///
	 assert(1 2 3) keep(1 2 3) keepusing(fodelsear fodelsemanad)
   destring fodelsear, replace
   noi tab _merge 
   drop if _merge == 2
   drop _merge 
   label var fodelsear "Mother's year of birth (Bakgrund)"
   destring fodelsemanad, replace
   label var fodelsemanad "Mother's month of birth (Bakgrund)"
end


program build_waves
	di "Wave 2"
	gen wave2 = ym(2005,6) if lan == 1
	replace wave2 = ym(2010, 1) if lan == 3
	replace wave2 = ym(2012, 1) if lan == 4
	replace wave2 = ym(2007, 1) if lan == 5
	replace wave2 = ym(2009, 4) if lan == 6
	replace wave2 = ym(2012, 3) if lan == 7
	replace wave2 = ym(2012, 1) if lan == 8
	replace wave2 = ym(2005, 1) if lan == 9
	replace wave2 = ym(2008, 1) if lan == 10
	replace wave2 = ym(2008, 1) if lan == 12
	replace wave2 = ym(2017, 5) if lan == 13
	replace wave2 = ym(2008, 2) if lan == 14
	replace wave2 = ym(2010, 3) if lan == 17
	replace wave2 = ym(2008, 11) if lan == 18
	replace wave2 = ym(2017, 2) if lan == 19
	replace wave2 = ym(2011, 1) if lan == 20
	replace wave2 = ym(2008, 1) if lan == 21
	replace wave2 = ym(2012, 3) if lan == 22
	replace wave2 = ym(2009, 1) if lan == 23
	replace wave2 = ym(2009, 10) if lan == 24
	replace wave2 = ym(3000, 1) if lan == 25  // Norrbotten never introduced KUB
	format wave2 %tmMon_YY
	
	di "Wave 3"
	gen wave3 = ym(2016, 10) if lan == 1
	replace wave3 = ym(2016, 9) if lan == 3
	replace wave3 = ym(2017, 1) if lan == 4
	replace wave3 = ym(2016, 1) if lan == 5
	replace wave3 = ym(2018, 6) if lan == 6
	replace wave3 = ym(2017, 5) if lan == 7
	replace wave3 = ym(2017, 9) if lan == 8
	replace wave3 = ym(2016, 1) if lan == 9
	replace wave3 = ym(2016, 6) if lan == 10
	replace wave3 = ym(2017, 5) if lan == 12
	replace wave3 = ym(2017, 5) if lan == 13
	replace wave3 = ym(2017, 1) if lan == 14
	replace wave3 = ym(2018, 4) if lan == 17
	replace wave3 = ym(2018, 10) if lan == 18
	replace wave3 = ym(2017, 2) if lan == 19
	replace wave3 = ym(2017, 11) if lan == 20
	replace wave3 = ym(2017, 1) if lan == 21
	replace wave3 = ym(2017, 9) if lan == 22
	replace wave3 = ym(2017, 9) if lan == 23
	replace wave3 = ym(3000, 1) if lan == 24  // Vasterbotten never introduced NIPT
	replace wave3 = ym(3000, 1) if lan == 25  // Norrbotten never introduced NIPT
	format wave3 %tmMon_YY
	
	rename wave2 wave2_intro
	rename wave3 wave3_intro
end

program share_do_nt
	tempfile nt_cov_samp
	save `nt_cov_samp'
	di "Share pregs in MBR in region-months of each type of KUB coverage"
	qui count 
	local tot_preg = r(N)
	
	di "share universal"
	qui count if kub_type == 2 & wave != 1
	di r(N) / `tot_preg' * 100
	
	di "share 35+"
	qui count if kub_type == 1 & wave != 1
	di r(N) / `tot_preg' * 100
	
	di "share 38+"
	qui count if kub_type == 3 & wave != 1
	di r(N) / `tot_preg' * 100
	
	di "No NT coverage"
	qui count if wave == 1 | (lan == 10 & wave == 3)
	di r(N) / `tot_preg' * 100
	
	foreach gr in all 2019 {
		di "`gr'"
		di "________"
		use `nt_cov_samp', clear
		if "`gr'" == "2019" keep if m_year >= ym(2019,1)
		
		di "overall"
		sum did_nt
		
		di "Covered months"
		gen covered = 1 * (wave == 2 | wave == 3)
		replace covered = 0 if (lan == 10 & wave == 3)
		sum did_nt if covered == 1
		local share_cov = r(mean) * 100 / 0.8
		local n_cov = r(N)
		di "Covered months with universal coverage"
		sum did_nt if covered == 1 & kub_type == 2
		local share_univ = r(mean) * 100 / 0.8
		local n_univ = r(N)
		di "covered with age cutoff at 35"
		sum did_nt if covered == 1 & kub_type == 1
		local share_35 = r(mean) * 100 / 0.8
		local n_35 = r(N)
		di "uncovered" 
		sum did_nt if covered == 0
		local share_uncov = r(mean) * 100 / 0.8
		local n_uncov = r(N)
		matrix share_nt = (`share_cov' \ `share_univ' \ `share_35' \ `share_uncov') , ///
		  (`n_cov' \ `n_univ' \ `n_35' \ `n_uncov')
		
		frmttable using "${RESULTS}\derived\nt_data_coverage\share_nt_`gr'.tex", statmat(share_nt) ///
		  rtitles("Covered" \ "Covered w Universal" \ "Covered 35+" \ "Uncovered")  ///
		  ctitles("" "Share do NT" "N pregs") ///
		  tex sdec(3) replace 
		
		matrix drop share_nt
		* Covered with age cutoff at 35 by age
		keep if covered == 1 & kub_type == 1
		local bin_size = 5
		qui gen age_bin = .
		replace age_bin = 1 if age < 20
		replace age_bin = 2 if age >= 20 & age < 25
		replace age_bin = 3 if age >= 25 & age < 30
		replace age_bin = 4 if age >= 30 & age < 35
		replace age_bin = 5 if age >= 35 & age < 40
		replace age_bin = 6 if age >= 40 

		
		collapse (mean) did_nt (count) n_preg = did_nt , by(age_bin)
		replace did_nt = did_nt * 100 / 0.8
		
		scatter did_nt age_bin, xlabel(1 "< 20" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40+")
		graph export "${RESULTS}\derived\nt_data_coverage\figures/share_nt_cov35_age_`gr'.pdf", as(pdf) replace 
	}
	
	
end	

program pr_share_do_nt
	use in_MFR in_PregR glopnr lopnr childid num_womb first_visit_date dob dob_flag fodelsedatum ///
	  bfoddat bp_datum bpsm bpsm_mhv1 ///
	  bpul_mhv1 bpul_inskr bpet alder_vid_forlossning gest_weeks_preg gest_days_preg ///
	  discharge_date_preg clinic county birth_order kvinnan_genomgatt_kub kvinnan_genomgatt_nipt /// 
	  kvinnan_genomgatt_kvb dodfott early_death gl_vid_inskrivning_veckor ///
	  kvinnan_genomgatt_amniocentes kvinnan_genomgatt_ultraljuds_un termination_clean* ///
	  termination_flag baby_diagnosis* birthweight_preg graviditetsreg_avslutas_orsak ///
	  graviditetsreg_avslutas avled_datum registreringsdatum_inskrivning apgar_5_min /// 
	  using "MYPATH\MYPATH.dta", clear
	}
	
	* Build waves 
	rename county lan
	gen week10_date = preg_end_date - 210 
	build_waves
	
	gen wave = 1 if week10_date < dofm(wave2_intro)
	replace wave = 2 if week10_date >= dofm(wave2_intro)
	replace wave = 3 if week10_date >= dofm(wave3_intro)
		
	* NT COVERAGE TYPE
	gen kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) 
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19)
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))

	keep if preg_end_date_yr == 2019
	rename did_kub did_nt 
	drop if termination_clean == 1
	
	di "Share overall out of 2019"
	tab did_nt, missing
	
	di "Covered months"
	qui gen covered = 1 * (wave == 2 | wave == 3)
	qui replace covered = 0 if (lan == 10 & wave == 3)
	tab did_nt if covered == 1
	
	di "Covered months with universal coverage"
	tab did_nt if covered == 1 & kub_type == 2
	
	di "covered with age cutoff at 35"
	tab did_nt if covered == 1 & kub_type == 1
	
	di "uncovered" 
	tab did_nt if covered == 0
	
	* Covered with age cutoff at 35 by age
	keep if covered == 1 & kub_type == 1
	qui gen age_bin = .
	replace age_bin = 1 if alder_vid_forlossning < 20
	replace age_bin = 2 if alder_vid_forlossning >= 20 & alder_vid_forlossning < 25
	replace age_bin = 3 if alder_vid_forlossning >= 25 & alder_vid_forlossning < 30
	replace age_bin = 4 if alder_vid_forlossning >= 30 & alder_vid_forlossning < 35
	replace age_bin = 5 if alder_vid_forlossning >= 35 & alder_vid_forlossning < 40
	replace age_bin = 6 if alder_vid_forlossning >= 40 

	collapse (mean) did_nt (count) n_preg = did_nt , by(age_bin)
	replace did_nt = did_nt * 100
	
	scatter did_nt age_bin, xlabel(1 "< 20" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40+")
	graph export "${RESULTS}\nt_data_coverage\figures/pregreg_share_nt_cov35_age_2019.pdf", as(pdf) replace 
	
end 


program clean_preg_reg
   format lopnr glopnr %15.0f
	rename registreringsdatum_inskrivning registration_date_pregR
    
	di "Extract Death date and Registration date"
	extract_date, var(avled_datum)
	extract_date, var(registration_date_pregR)
	extract_date, var(fodelsedatum)
	foreach var in avled_datum registration_date_pregR {
	    drop `var'_mo `var'_yr
	}

    di "For nonsingleton births, take the max date of dobs"
    sort glopnr birth_order, stable
    by glopnr: replace dob = dob[_n-1] if dob != dob[_n-1] ///
        & !mi(dob[_n-1]) & !mi(dob)
   
    di "Gen pregnancy end date"
    gen preg_end_date = cond(!mi(dob), dob, .)
    foreach date in bpet bpsm bpsm_mhv1 bp_datum bpul_mhv1 bpul_inskr {
        extract_date, var(`date')
        replace preg_end_date = `date' - 280 + 22 * 7 if mi(preg_end_date) & !mi(`var')
    }
    extract_date, var(preg_end_date)
    recode_county, from(county)
    
    di "Define test dummies"
    rename (kvinnan_genomgatt_kub kvinnan_genomgatt_nipt kvinnan_genomgatt_kvb ///
kvinnan_genomgatt_amniocentes kvinnan_genomgatt_ultraljuds_un) (kub nipt cv amnio ultrasound)
    
    label def did_test 1 "Yes" 0 "No" .p "Don't know, var populated" .d "Don't know, var missing"
    
    foreach test in kub nipt cv amnio ultrasound {
        gen did_`test' = 1 if `test' == "Ja"
        replace did_`test' = 0 if `test' == "Nej"
        replace did_`test' = .p if `test' == "Vet ej"
        label val did_`test' did_test
        drop `test'
    }
    
    di "Baby's diagnoses" 
    di "(removing duplicated diagnosis codes)"
    sort lopnr glopnr birth_order
    gen bdiag = ""
    replace baby_diagnosis_preg = strtrim(baby_diagnosis_preg)
    forval i = 1/4 {
        by lopnr glopnr: gen bdiag`i' = baby_diagnosis_preg[`i'] if num_womb >= `i' & !mi(baby_diagnosis_preg[`i'])
        split bdiag`i', parse(,) g(bd_)
        drop bdiag`i' 
        foreach var of varlist bd_* {
            replace `var' = strtrim(`var')
            replace bdiag = bdiag + "," + `var' if !mi(`var') & !mi(bdiag) ///
                & strpos(bdiag, `var') == 0
            replace bdiag = `var' if !mi(`var') & mi(bdiag)
            drop `var'
        }
    }
    replace baby_diagnosis_preg = bdiag
    drop bdiag
    
	di "Reason for Termination"
	encode graviditetsreg_avslutas_orsak if !mi(graviditetsreg_avslutas_orsak), ///
	  gen(termination_reason) 
	drop graviditetsreg_avslutas_orsak
	label var termination_reason "Reason for termination (raw var from PregR)"
	label define reason 1 "Aborted bc Fetal Abnormality" 2 " Miscarriage prior to week 22" 3 "Aborted (other)"
	label val termination_reason reason
	
    drop birth_order
    duplicates drop
    unique lopnr glopnr bfoddat
    save "$TEMP\make_master_dataset\gravreg.dta", replace


end

 program extract_date
        syntax, var(varname)
        di "Extracting date in [`var']..."
        quietly {
            local vartype: type `var'
            tempvar orig 
            if strpos("`vartype'", "str") > 0 {
                clonevar `orig' = `var'
                drop `var'
                noi di "Variable format is string YYYY-MM-DD"
                noi di "Processing..."
                gen `var' = date(`orig', "YMD")
            }
            else if "`vartype'" == "double" {
                clonevar `orig' = `var'
                drop `var'
                noi di "Variable format is datetime"
                noi di "Processing..."
                gen `var' = dofc(`orig')
            }
            format `var' %tdCCYY-NN-DD
            gen `var'_yr = year(`var')
            gen `var'_mo = month(`var')
        }
    end

    program recode_county
        syntax, from(varname)
        di "Clean lan"
        quietly {
            replace `from' = "1" if strpos(`from', "Stockholm") > 0
            replace `from' = "3"  if strpos(`from', "Uppsala") > 0
            replace `from' = "4" if strpos(`from', "Södermanland") > 0 | strpos(`from', "Sörmland") > 0
            replace `from' = "5" if strpos(`from', "Östergötland") > 0
            replace `from' = "6" if strpos(`from', "Jönköping") > 0
            replace `from' = "7" if strpos(`from', "Kronoberg") > 0
            replace `from' = "8" if strpos(`from', "Kalmar") > 0
            replace `from' = "9" if strpos(`from', "Gotland") > 0
            replace `from' = "10" if strpos(`from', "Blekinge") > 0 
            replace `from' = "12" if strpos(`from', "Skåne") > 0 
            replace `from' = "13" if strpos(`from', "Halland") > 0 
            replace `from' = "14" if strpos(`from', "Västra Götaland") > 0 
            replace `from' = "17" if strpos(`from', "Värmland") > 0 
            replace `from' = "18" if strpos(`from', "Örebro") > 0 
            replace `from' = "19" if strpos(`from', "Västmanland") > 0
            replace `from' = "20" if strpos(`from', "Dalarna") > 0 
            replace `from' = "21" if strpos(`from', "Gävleborg") > 0 
            replace `from' = "22" if strpos(`from', "Västernorrland") > 0 
            replace `from' = "23" if strpos(`from', "Jämtland") > 0 
            replace `from' = "24" if strpos(`from', "Västerbotten") > 0 
            replace `from' = "25" if strpos(`from', "Norrbotten") > 0
        }
        destring `from', replace
        
        label define lan 1 "Stockholm" 3 "Uppsala" 4 "Södermanland" 5 "Östergötland" 6 "Jönköping" 7 "Kronoberg" 8 "Kalmar" 9 "Gotland" 10 "Blekinge" 12 "Skåne" 13 "Halland" 14 "Västra Götaland" 17 "Värmland" 18 "Örebro" 19 "Västmanland" 20 "Dalarna" 21 "Gävleborg" 22 "Västernorrland" 23 "Jämtland" 24 "Västerbotten" 25 "Norrbotten"
        label values `from' lan
        
        /* 
        Note: lan = 11, 15, 16 are not identified. Although not implemented here,
        but - it turns out that lan = 11, 15, 16 belong to lan = 14. You can just
        google "Sweden counties" and you'll find it on the Wikipedia page. 
        */
end


program share_analysis_covered
	use $DATA/analysis_sample.dta, clear
		* NT COVERAGE TYPE
	gen kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) 
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19)
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))
	
	di "Analysis sample total"
	count
	
	di "Covered months"
	qui gen covered = 1 * (wave == 2 | wave == 3)
	qui replace covered = 0 if (lan == 10 & wave == 3)
	count if covered == 1
	
	di "Covered months with universal coverage"
	count if covered == 1 & kub_type == 2
	
	di "covered with age cutoff at 35"
	count if covered == 1 & kub_type == 1
	
	di " ----- Covered with age cutoff at 35, <35 "
	count if covered == 1 & kub_type == 1 & age < 35
	
	di " ----- Covered with age cutoff at 35, 35+ "
	count if covered == 1 & kub_type == 1 & age >= 35
	
	di "uncovered" 
	count if wave == 1
	

end

* EXECUTE
main


